# -*- coding: utf-8 -*-
"""
@Time ： 2024/3/29 20:39
@Auth ： fcq
@File ：test.py
@IDE ：PyCharm
@Motto：ABC(Always Be Coding)
"""
from transformers import BertTokenizer, AutoTokenizer, BertweetTokenizer, BartTokenizer

tokenizer =  BertTokenizer.from_pretrained('D:/Projects/bot-detection/BERT_PretrainModel/bert-base-uncased')
sent = ['you' for i in range(250)]
tar = ['am', 'dad']

concat_sent = []
concat_sent.append([' '.join(sent), ' '.join(tar)])

encoded_dict = tokenizer.batch_encode_plus(
    concat_sent,
    add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
    max_length=int(200),  # Pad & truncate all sentences.
    padding='max_length',
    return_attention_mask=True,  # Construct attn. masks.
    truncation=True,
)
print(1)
